#import excel file into into python using pandas
import pandas as pd
pd.options.plotting.backend = "plotly"
import plotly.io as pio
import plotly.graph_objects as go
import numpy as np


#send plots to current browser. Export from browser as .png
pio.renderers.default='browser'

'''
Flags
'''
autoscale=True # autoscale flag. If true use autoscale on figures
save_images=True # If true save images to disk.

Image_dir = "C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/images/"

Data_dir = "C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/data/"

#Document_Data = pd.read_excel(r"C:\CEHC 391 Spring 2024\Test.xlsx")
Document_Data = pd.read_excel("C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/input data/TALID Version 1 Scores_by_Document.xlsx")

Mean_Ranges_DF= pd.read_excel("C:/Michael/Albany LAIO Lab and Research Projects/Is a profile worth/input data/Observed Mean Ranges.xlsx")


#get list of leaders with >= 10,000 words

Temp_DF= Document_Data.groupby(['FirstOfAuthor']).sum()
#make sure indexes pair with number of rows
Temp_DF=Temp_DF.reset_index() 

#get list of unique authors
Temp_DF=Temp_DF[Temp_DF['Word_Count']>9999]

unique_values = Temp_DF['FirstOfAuthor'].unique()

# accumulate results in OutputRows
OutputRows=[]

    
''' debug
for name in unique_values:
    print(name)
'''

#Calculate cumulate score for indicators by document
for name in unique_values:  
    
    # select rows where FirstOfAuthor = name  
    author_rows = Document_Data[Document_Data['FirstOfAuthor']==name]    
    author_rows = author_rows.reset_index() #make sure indexes pair with number of rows
    
    # we need something like an accumulator (or running total?) for components of scores  such as IC and EC for BACE
    '''
    For each author and each document--
    
    running totals to be calculated: BACE, CC, DIS, IGB, PWR, SC, TASK, nACH-100, 
    nAFF-100, nPWR-100, I1, P1 for each author, how many words, 
    how many LTA observations there are, how many achievement obsvervations, 
    how many power observations, how many OCA-self, observations on 
    how many OCA-other observations, and how many words 
    
    '''  
    
    # set accumulators to 0 before the loop
    Words=0
    
    IC=0
    EC=0
    
    #HC and LC for CC
    HC=0
    LC=0
    
    #initialize accumulators to 0 before DIS = HD / (HD + LD)
    HD=0
    LD=0
    
    #initialize accumulators to 0 before IGB = HB / (HB + LB)
    HB=0
    LB=0
    
    #PWR
    HP=0
    LP=0
    
    #SC
    HS=0
    LS=0
    
    #TASK
    HT=0
    LT=0
    
    #nACH-100
    nACH=0
    
    #nAFF-100
    nAFF=0
    
    #nPWR-100
    nPWR=0
    
    #I1
    self_positive=0
    self_negative=0
    
    #P1
    other_positive=0
    other_negative=0
    
    
    for index, row in author_rows.iterrows():

        #calculate BACE, CC, DIS, IGB, PWR, SC, TASK
        
        #update accumulators
        Words += row['Word_Count']
        
        IC += row['IC']
        EC += row['EC']
        
        HC += row['HC']
        LC += row['LC']
        
        HD += row['HD']
        LD += row['LD']
        
        HB += row['HB']
        LB += row['LB']
        
        HP += row['HP']
        LP += row['LP']
        
        HS += row['HS']
        LS += row['LS']
        
        HT += row['HT']
        LT += row['LT']
        
        nACH += row['nACH']
        
        nAFF += row['nAFF']
        
        nPWR += row['nPWR']
        
        #print(self_positive, self_negative, other_positive, other_negative)
          
        self_positive += row['self appeal']+row['self promise']+row['self reward']
        self_negative += row['self punish']+row['self threaten']+row['self oppose']
          
        other_positive += row['other appeal']+row['other promise']+row['other reward']
        other_negative += row['other punish']+row['other threaten']+row['other oppose']
        
        #Calculate new scores
        try: 
            BACE = IC / (IC + EC)
         
        except ZeroDivisionError:
            BACE = 'ZeroDivisionError'
            
      
        try: CC = HC / (HC + LC)
      
        except ZeroDivisionError:
          CC = 'ZeroDivisionError'
          
          
        try: DIS = HD / (HD + LD)
      
        except ZeroDivisionError:
            DIS = 'ZeroDivisionError'
            
          
        try: IGB = HB / (HB + LB)
        
        except ZeroDivisionError:
            IGB = 'ZeroDivisionError'
            
            
        try: PWR = HP / (HP + LP)
            
        except ZeroDivisionError:
                PWR = 'ZeroDivisionError'
                
                
        try: SC = HS / (HS + LS)
            
        except ZeroDivisionError:
                SC = 'ZeroDivisionError'
                
        
        try: TASK = HT / (HT + LT)
           
        except ZeroDivisionError:
                TASK = 'ZeroDivisionError'
              
            
        #new formula for nACH-100, nAFF_100, nPWR-100
        
        
        try: nACH_100 = (nACH / Words) * 100
         
        except ZeroDivisionError:
              nACH_100 = 'ZeroDivisionError'
              
              
        try: nAFF_100 = (nAFF / Words) * 100
         
        except ZeroDivisionError:
              nAFF_100 = 'ZeroDivisionError' 
              
              
        try: nPWR_100 = (nPWR / Words) * 100  
              
        except ZeroDivisionError:
              nPWR_100 = 'ZeroDivisionError'
              
        # formula for I1 and P1 after assigning positive and negative attributions to self_positive and self-negative
        
              
        try: I1= (self_positive - self_negative)/(self_positive + self_negative)
     
        except ZeroDivisionError:
            I1  = 'nan'
            
        try: P1 = (other_positive - other_negative)/(other_positive + other_negative)
      
        except ZeroDivisionError:
          P1  = 'nan'
            
      
    
        #specify some values for NewRow
        NewRow={'Author': name, 'Document':row['Filename'], 'Words':Words, 
                  'BACE':BACE, 'BACE-observations': IC+EC, 'CC':CC, 'CC-observations': HC+LC, 
                  'DIS':DIS, 'DIS-observations': HD+LD, 'IGB':IGB, 'IGB-observations': HB+LB, 
                  'PWR':PWR, 'PWR-observations': HP+LP, 'SC':SC, 'SC-observations': HS+LS, 
                  'TASK':TASK, 'TASK-observations': HT+LT, 'nACH-100':nACH_100, 
                  'nACH-100-observations': nACH, 'nAFF-100':nAFF_100, 'nAFF-100-observations': nAFF, 
                  'nPWR-100':nPWR_100, 'nPWR-100-observations': nPWR, 'I1':I1, 
                  'I1-observations':self_positive+self_negative, 
                  'P1':P1, 'P1-observations':other_negative+other_positive}
     
        #Add NewRow to  OutputRows
        OutputRows.append(NewRow)
        #end inner loop
    
    

#end loop


OutputDF=pd.DataFrame.from_records(OutputRows)


#Create Code that loops across each author and adds the trace for a single variable to the plot

#create list of variable, variable-obs pairs
#each pair has two parts, part[0]  which is the variable name
# part[1] which is the observation variable

List_of_variable_pairs =[['BACE','BACE-observations'],
                          ['CC', 'CC-observations'],
                          ['DIS', 'DIS-observations'],
                          ['IGB', 'IGB-observations'],
                          ['PWR', 'PWR-observations'],
                          ['SC', 'SC-observations'],
                          ['TASK', 'TASK-observations'],
                          ['nACH-100', 'nACH-100-observations'],
                          ['nAFF-100', 'nAFF-100-observations'],
                          ['nPWR-100', 'nPWR-100-observations'],
                          ['I1', 'I1-observations'],
                          ['P1', 'P1-observations']]
                          

#begin outer variable loop

for variable_pair in List_of_variable_pairs:
    variable_name= variable_pair[0]
    variable_observations= variable_pair[1]

    #create go Figures  for variable to add traces to
    fig = go.Figure()
    fig1 = go.Figure()
    
        
    #get list of unique leaders
    
    unique_leaders = OutputDF['Author'].unique()
    
    #begin leader specific loop
    SlopeRows=[]
    
    # use that list to loop over the dataframe
    for leader in unique_leaders:
    
       
        #create dataframe for a single leader's data
    
        author_rows = OutputDF[OutputDF['Author']==leader]
        author_rows = author_rows.sort_values(by=['Words'])
        
        
        Slope_DF=author_rows[author_rows['Words']>39000]
        Slope_DF[variable_name]=Slope_DF[variable_name].astype(float)
        if len(Slope_DF) >10:
        
            slope= np.polyfit(Slope_DF['Words'], Slope_DF[variable_name], 1)
            
            SlopeRows.append(slope)
        
        
        
        
        
    
        # Add traces
        fig.add_trace(go.Scatter(x=author_rows['Words'],
                            y=author_rows[variable_name],
                            mode='lines',
                            #set the name of the trace to author
                            name=leader,
                            #text=OutputDF['Year'],
                            textposition='top center'))
    
        fig1.add_trace(go.Scatter(x=author_rows['Words'],
                            y=author_rows[variable_name],
                            mode='lines',
                            #set the name of the trace to author
                            name=leader,
                            #text=OutputDF['Year'],
                            textposition='top center'))
    
        '''End of leader loop'''
    
    '''do a quick histogram'''
    Slope_data=pd.DataFrame(SlopeRows, columns=['slope', 'intercept'])
    hist=go.Figure(data=[go.Histogram(x=Slope_data['slope'], nbinsx=100)])
    hist.update_layout(title= variable_name +" Slopes")
    hist.update_xaxes(exponentformat = 'E')
    hist.update_xaxes(title="Slope")
    hist.update_yaxes(title="Frequency")
    hist.show()
    if save_images:
        hist.write_image(Image_dir+"Histogram of slopes for "+variable_name+" words greater than 40K.svg", format='svg',scale=2, engine='orca')
    
    
    
    #Create a title that includes 'Leader Name: Variable by Variable Observation' but make it more specific and unique?
    fig.update_layout(title= "Leaders: " + variable_name +" by Words")
    fig.update_layout(showlegend=False)
    fig.update_xaxes(title="Words",range=[0, 150000])
    fig.update_yaxes(title=variable_name)
    
        
    fig.show()  
    if save_images:
        fig.write_image(Image_dir+"Leaders " + variable_name +" by Words.svg", format='svg',scale=2, engine='orca')
    
    fig1.update_layout(title= "Leaders: " + variable_name +" by Words")
    fig1.update_layout(showlegend=False)
    fig1.update_xaxes(title="Words",range=[40000, 80000])
    fig1.update_yaxes(title=variable_name, range=[Mean_Ranges_DF.at[0,variable_name],Mean_Ranges_DF.at[1,variable_name]])
    
        
    fig1.show()  
    if save_images:
        fig1.write_image(Image_dir+"Leaders " + variable_name +"zoomed by Words.svg", format='svg',scale=2, engine='orca')
    
    '''End of variable loop'''
        
  
    
print("Clean exit")
       